##### Title: "Rebel Presence and Border Demarcation"  #####


#### Clear work space ----
rm(list = ls())

#### Libraries ----
# Core data manipulation and visualization
library(tidyverse)     # Loads dplyr, ggplot2, readr, tibble, etc.
library(ggplot2)       # Explicitly included for emphasis on visualization

# Statistical modeling and estimation
library(MASS)          # Linear and nonlinear models, stepwise selection
library(survival)      # Survival analysis
library(lfe)           # Fixed effects models (felm)
library(marginaleffects) # Marginal effects estimation and visualization
library(ggeffects)     # Predicted values from models, ggplot-compatible
library(broom)         # Tidy model outputs
library(boot)          # Bootstrap estimation

# Table generation
library(modelsummary)  # Summary tables for models 
library(gridExtra)     # Arrange multiple ggplots
library(grid)          # Low-level graphics system

# Data import
library(haven)         # Read Stata files


#### Load datasets ----
ibad <- read_dta("Replication - IBAD Full Settle Dyad-Year.dta") #For Fig.1
icow <- read.csv("icow_jpr_replication_data_claim_years.csv") #For Fig.1
stat <- read.csv("states2016.csv") #For Fig.1                             
df1 <- read.csv("dataH1.csv") # For Tab.1
df2 <- read.csv("dataH2.csv") # For Tab.2

#### Figure 1 ----
### (A)
# Prepare claim data (border disputes per year)
icow <- icow %>%
  mutate(begclaimyear = floor(begclaim / 100),
         endclaimyear = floor(endclaim / 100)) %>%
  filter(begclaimyear > 1945)

claim_counts <- icow %>%
  group_by(begclaimyear) %>%
  summarise(count = n()) %>%
  ungroup() %>%
  rename(year = begclaimyear) %>%
  mutate(Type = "Onset of Border Disputes")  # Changing a new label

# Prepare state data (number of sovereign states per year)
stat_count <- stat %>%
  group_by(styear) %>%
  summarise(count = n()) %>%
  ungroup() %>%
  filter(styear > 1945, styear < 2002) %>%
  rename(year = styear)

# Scaling sovereign states data
max_claims <- max(claim_counts$count, na.rm = TRUE)
max_states <- max(stat_count$count, na.rm = TRUE)
scale_factor <- max_claims / max_states

stat_count_scaled <- stat_count %>%
  mutate(count = count * scale_factor,
         Type = "Number of New Sovereign States")  

# Combine data (for legend consistency)
combined_data <- bind_rows(
  claim_counts %>% dplyr::select(year, count, Type),
  stat_count_scaled %>% dplyr::select(year, count, Type)
)

# Plot
Fig1A <- ggplot() +
  geom_bar(data = claim_counts, aes(x = year, y = count, fill = Type),
           stat = "identity", alpha = 0.7) +
  geom_line(data = stat_count_scaled, aes(x = year, y = count, color = Type),
            size = 1.2) +
  scale_x_continuous(breaks = seq(1950, max(claim_counts$year, na.rm = TRUE), by = 10)) +
  scale_y_continuous(
    name = "Onset of Border Disputes",  
    sec.axis = sec_axis(~ . / scale_factor, name = "Number of New Sovereign States") 
  ) +
  scale_fill_manual(values = c("Onset of Border Disputes" = "steelblue")) +  
  scale_color_manual(values = c("Number of New Sovereign States" = "firebrick")) +  
  labs(
    title = "(A)",
    x = "Year",
    y = "Count",
    fill = "", color = ""  
  ) +
  theme_minimal(base_family = "sans") +
  theme(
    plot.title = element_text(color = "black", size = 12, face = "bold"),
    axis.title = element_text(color = "black"),
    axis.text = element_text(color = "black"),
    axis.title.y.right = element_text(color = "black"),
    axis.text.y.right = element_text(color = "black"),
    legend.position = "top",
    legend.text = element_text(color = "black"),
    plot.background  = element_rect(fill = "aliceblue", color = NA),
    panel.background = element_rect(fill = "white", color = NA)
  )
###(B)
# Create missing variable
ibadfig1 <- ibad %>%
  mutate(missing = ifelse(!is.na(settle), 0, 1))

# Create region variable
ibadfig1 <- ibadfig1 %>%
  mutate(region = case_when(
    ccode1 < 199 ~ 1,   # Americas
    ccode1 >= 199 & ccode1 < 400 ~ 2,  # Europe
    ccode1 >= 400 & ccode1 < 600 ~ 3,  # Africa
    ccode1 >= 600 & ccode1 < 700 ~ 4,  # Middle East
    ccode1 >= 700 ~ 5  # Asia
  ))

# Create regional dummy variables
ibadfig1 <- ibadfig1 %>%
  mutate(
    Americas    = ifelse(region == 1, 1, 0),
    Europe      = ifelse(region == 2, 1, 0),
    Africa      = ifelse(region == 3, 1, 0),
    MiddleEast  = ifelse(region == 4, 1, 0),
    Asia        = ifelse(region == 5, 1, 0),
    didyad      = 1  # Assign 1 to all observations
  )

# Yearly aggregation
settlement_summary <- ibadfig1 %>%
  group_by(year) %>%
  summarise(
    total_missing = sum(missing, na.rm = TRUE),
    total_dyads = sum(didyad, na.rm = TRUE),
    total_settle1 = sum(settle, na.rm = TRUE),
    total_settleratio1 = total_settle1 / (total_dyads - total_missing)
  )

# Settlement ratios by region
region_summary <- ibadfig1 %>%
  group_by(year) %>%
  summarise(
    total_settle_Amer = sum(settle * Americas, na.rm = TRUE),
    total_missing_Amer = sum(missing * Americas, na.rm = TRUE),
    total_Amer = sum(Americas, na.rm = TRUE),
    total_ratio_Amer = total_settle_Amer / (total_Amer - total_missing_Amer),
    
    total_settle_Eur = sum(settle * Europe, na.rm = TRUE),
    total_missing_Eur = sum(missing * Europe, na.rm = TRUE),
    total_Eur = sum(Europe, na.rm = TRUE),
    total_ratio_Eur = total_settle_Eur / (total_Eur - total_missing_Eur),
    
    total_settle_Afr = sum(settle * Africa, na.rm = TRUE),
    total_missing_Afr = sum(missing * Africa, na.rm = TRUE),
    total_Afr = sum(Africa, na.rm = TRUE),
    total_ratio_Afr = total_settle_Afr / (total_Afr - total_missing_Afr),
    
    total_settle_ME = sum(settle * MiddleEast, na.rm = TRUE),
    total_missing_ME = sum(missing * MiddleEast, na.rm = TRUE),
    total_ME = sum(MiddleEast, na.rm = TRUE),
    total_ratio_ME = total_settle_ME / (total_ME - total_missing_ME),
    
    total_settle_Asia = sum(settle * Asia, na.rm = TRUE),
    total_missing_Asia = sum(missing * Asia, na.rm = TRUE),
    total_Asia = sum(Asia, na.rm = TRUE),
    total_ratio_Asia = total_settle_Asia / (total_Asia - total_missing_Asia)
  ) %>% 
  filter(year>1945)

region_long <- region_summary %>%
  dplyr::select(year, total_ratio_Amer, total_ratio_Eur, total_ratio_Afr, total_ratio_ME, total_ratio_Asia) %>%
  pivot_longer(cols = -year, names_to = "Region", values_to = "Settlement_Ratio") %>%
  mutate(Region = case_when(
    Region == "total_ratio_Amer" ~ "Americas",
    Region == "total_ratio_Eur"  ~ "Europe",
    Region == "total_ratio_Afr"  ~ "Africa",
    Region == "total_ratio_ME"   ~ "Middle East",
    Region == "total_ratio_Asia" ~ "Asia",
    TRUE ~ Region
  ))

# Create plot
Fig1B <- ggplot(region_long, aes(x = year, y = Settlement_Ratio, color = Region)) +
  geom_line(size = 1, show.legend = TRUE) +  
  labs(
    title = "(B)",
    x = "Year",
    y = "% of Contiguous Dyads with Settled Borders",
    color = "Region"   
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(color = "black", size = 12, face = "bold"),
    axis.title = element_text(color = "black"),
    axis.text = element_text(color = "black"),
    axis.title.y.right = element_text(color = "black"),
    axis.text.y.right = element_text(color = "black"),
    legend.position = "top",
    legend.text = element_text(color = "black"),
    
    plot.background  = element_rect(fill = "aliceblue", color = NA), 
    panel.background = element_rect(fill = "white", color = NA) 
  )

# Arrange side-by-side
combined <- grid.arrange(Fig1A, Fig1B, ncol = 2)

# Create a caption
caption_bg1 <- gTree(children = gList(
  rectGrob(
    gp = gpar(fill = "aliceblue", col = NA)  
  ),
  textGrob(
    "(A) Onset of Border Disputes and New Sovereign States, 1946-2001; (B) Dyadic Border Settlements by Year, 1946-2001",
    gp = gpar(fontsize = 14),
    hjust = 0, x = 0                         
  )
))

caption_bg2 <- gTree(children = gList(
  rectGrob(
    gp = gpar(fill = "aliceblue", col = NA)  
  ),
  textGrob(
    "Source: Adapted by the author from Frederick et al. (2017) for (A) and Owsiak et al. (2017) for (B).",
    gp = gpar(fontsize = 14),
    hjust = 0, x = 0                         
  )
))
combined1 <- arrangeGrob(
  combined,
  caption_bg1,
  ncol = 1,
  heights = unit.c(unit(1, "npc") - unit(1, "lines"), unit(1, "lines"))
)
combined2 <- arrangeGrob(
  combined1,
  caption_bg2,
  ncol = 1,
  heights = unit.c(unit(1, "npc") - unit(1, "lines"), unit(1, "lines"))
)
plot(combined2)

##### Table 1 ----
# Data preparation
df1 <- df1 %>%
  group_by(dyad) %>%
  arrange(year) %>%
  mutate(DyadLife = row_number()) %>%
  ungroup()

df1.2 <- df1 %>%
  mutate(Rebel = if_else(support == 1, 0, rebel)) # Rebel presence adjusted for gov. support

df1.3 <- df1 %>% 
  rename("Rebel"=rebel)

model1h1a <-  clogit(ICOWterrclaim ~ Rebel + 
                      + jtpolity + defense
                    + lncincratio + coldwar + strata(dyad), data=df1.2)
model2h1a <-  clogit(ICOWterrclaim ~ Rebel +
                      + jtpolity+ defense 
                    + lncincratio + coldwar + 
                      DyadLife + I(DyadLife^2) + I(DyadLife^3) +strata(dyad), data=df1.2)
model3h1b <-  clogit(ICOWterrclaim ~ Rebel + Rebel:support +
                   jtpolity+ defense +
                   +lncincratio + coldwar + strata(dyad), data = df1.3)
model4h1b <-  clogit(ICOWterrclaim ~ Rebel + Rebel:support 
                 + jtpolity+ defense 
                 + lncincratio + coldwar + 
                   DyadLife + I(DyadLife^2) + I(DyadLife^3) +strata(dyad), data=df1.3)

summary(model1h1a)
summary(model2h1a)
summary(model3h1b)
summary(model4h1b)

modelh1 <- list(
  "Model1" = model1h1a,
  "Model2" = model2h1a,
  "Model3" = model3h1b,
  "Model4" = model4h1b
)
tab1 <- modelsummary(modelh1,
                     output="results/tab1.docx",
                     stars = c("*" = 0.1, "**" = 0.05, "***" = 0.01),  
                     coef_map = c("Rebel" = "Rebel Existance",
                                  "Rebel:support" = "Rebel × Support",
                                  "jtpolity" = "Joint Democracy",  
                                  "defense" = "Allied", 
                                  "lncincratio" = "Capacity Ratio (ln)", 
                                  "coldwar" = "Cold War",
                                  "DyadLife" = "Dyad Life",
                                  "I(DyadLife^2)"= "Dyad Life²",
                                  "I(DyadLife^3)"= "Dyad Life³"),
                     gof_map = c("nobs" = "Observations", 
                                 "r.squared" = "R²", 
                                 "logLik" = "Log Likelihood",
                                 "AIC" = "AIC",
                                 "BIC" = "BIC"),
                     notes = "Conditional logistic regression; standard errors in parentheses.",
                     fmt = 3
)


####Table 2----
model1h2 <- felm(
  oneside_best_log ~ claimper + terrdum + 
    strengthcent_ord + rebstrength_ord + nonmilsupport + 
    l1gdppc_log + l1gdppc_change | 
    countrynum,  
  data = df2
)
summary(model1h2)

model2h2 <- felm(
  oneside_best_log ~ claimper + terrdum +
    strengthcent_ord + rebstrength_ord + nonmilsupport + 
    l1gdppc_log + l1gdppc_change| countrynum + year,
  data = df2
)
summary(model2h2)



modelh2 <- list(
  "Model 1" = model1h2,
  "Model 2" = model2h2
)

tab2 <- modelsummary(modelh2,
                     stars = c("*" = 0.1, "**" = 0.05, "***" = 0.01), 
                     output="results/tab2.docx",
                     coef_map = c("claimper" = "% of Border Disputed States", 
                                  "exterrdum_low" = "Foreign Territory",
                                  "terrdum" = "Domestic Territory",  
                                  "strengthcent_ord" = "Central Command Strength", 
                                  "rebstrength_ord" = "Rebel strength", 
                                  "nonmilsupport" = "Nonmilitary support",
                                  "l1gdppc_log"= "GDP per capita(ln)",
                                  "l1gdppc_change" = "GDP per capita growth(ln)"),
                     gof_map = c("nobs" = "Observations", 
                                 "r.squared" = "R²", 
                                 "logLik" = "Log Likelihood"),
                     notes = "OLS with fixed effect; standard errors in parentheses.",
                     fmt = 3
)

##### Supplementary File #####
##Table S.1
Rh1 <- read.csv("data/AppendixH1.csv")

Rm1h1 <- clogit(proposelag ~ rebel +  lncinc + lncincratio + tcpop_dummy + tcresource + 
                  v2x_libdem + tcstratloc 
                + strata(claimproposer), data=Rh1)
Rm2h1 <- clogit(proposelag ~ rebel + rebel:support + lncinc + lncincratio + tcpop_dummy + tcresource + 
                  v2x_libdem + tcstratloc 
                + strata(claimproposer), data=Rh1)
Rm3h1 <- clogit(proposelag ~ rebel + rebel:support + lncinc + lncincratio + tcpop_dummy + tcresource + 
                  v2x_libdem + tcstratloc 
                + duration + I(duration^2) + I(duration^3)+
                  strata(claimproposer), data=Rh1)


Rh1list <- list(
  "Model 1" = Rm1h1,
  "Model 2" = Rm2h1,
  "Model 3" = Rm3h1
)

modelsummary(
  Rh1list,
  output = "results/supplement1.docx",
  fmt = 3,
  stars = c("*"=0.1,"**"=0.05,"***"=0.01),
  coef_map = c(
    "rebel"        = "Rebel Existence",
    "rebel:support"      = "Rebel × Support",
    "lncinc"       = "Host State Capacity (ln)",
    "lncincratio"  = "Capacity Ratio (ln)",
    "v2x_libdem"   = "Democracy",
    "tcpop_dummy"  = "Population",
    "tcresource"   = "Natural Resource",
    "tcstratloc"   = "Strategic Territory",
    "duration"     = "Duration",
    "I(duration^2)"= "Duration²",
    "I(duration^3)"= "Duration³"
  ),
  gof_map = c("nobs" = "Observations","logLik" = "Log Likelihood"),
  notes = "Conditional logistic regression; standard errors in parentheses."
)

##Table S.2
df1r <- df1.2

# Convert NSA strength into a categorical variable with descriptive labels
df1r$nsabalance_cat <- factor(df1.2$nsa_strong,
                              levels = c(1, 2, 3, 4, 5),
                              labels = c("Much Weaker", "Weaker", "Parity", "Stronger", "Much Stronger"))

# Create a linear version of NSA strength
df1r$nsa_strong_linear <- df1r$nsa_strong

# Define NSA absence (originally NA) as 0 for linear modeling
df1r$nsa_strong_linear[is.na(df1r$nsa_strong)] <- 0


# Estimate a conditional logit model using NSA strength as a linear predictor
model_linear_split <- clogit(ICOWterrclaim ~ nsa_strong_linear +
                               jtpolity + defense + lncincratio + coldwar +
                               strata(dyad), data = df1r)

# Recode NSA absence (originally NA) as a new category labeled "Absent"
df1r$nsabalance_cat2 <- as.character(df1r$nsabalance_cat)
df1r$nsabalance_cat2[is.na(df1r$nsa_strong)] <- "Absent"

# Convert to factor and set "Absent" as the reference category
df1r$nsabalance_cat2 <- factor(df1r$nsabalance_cat2,
                               levels = c("Absent", "Much Weaker", "Weaker", "Parity", "Stronger", "Much Stronger"))

# Estimate a conditional logit model using NSA strength as a categorical predictor
model_with_absent <- clogit(ICOWterrclaim ~ nsabalance_cat2 +
                              jtpolity + defense + lncincratio + coldwar +
                              strata(dyad), data = df1r)

summary(model_with_absent)
summary(model_linear_split)

Rh1.2list <- list(
  "A: Linear (Rebel Strength)" = model_linear_split,
  "B: Categorical (Rebel Strength, Absent as Baseline)" = model_with_absent
)

# Generate the table and export to Word
modelsummary(
  Rh1.2list,
  output = "results/supplement2.docx",
  fmt = 3,
  stars = c("*"=0.1,"**"=0.05,"***"=0.01),
  coef_map = c( 
    "nsa_strong_linear" = "Rebel Strength (Linear)",
    "nsabalance_cat2Much Weaker" = "Rebel: Much Weaker",
    "nsabalance_cat2Weaker" = "Rebel: Weaker",
    "nsabalance_cat2Parity" = "Rebel: Parity",
    "nsabalance_cat2Stronger" = "Rebel: Stronger",
    "nsabalance_cat2Much Stronger" = "Rebel: Much Stronger",
    "jtpolity" = "Joint Democracy",
    "defense" = "Allied",
    "lncincratio" = "Capacity Ratio (ln)",
    "coldwar" = "Cold War"
  ),
  gof_map = c("nobs" = "Observations","logLik" = "Log Likelihood"),
  notes = "Exponentiated coefficients (odds ratios) shown. In the categorical model, 'Absent' is the reference category."
)

##Table S.3
#library
library(plm)
library(lfe)
library(fixest)


## Fixed effects: country only
model_plm <- plm(
  oneside_best_log ~ claimper + terrdum + strengthcent_ord + rebstrength_ord + nonmilsupport + l1gdppc_log + l1gdppc_change,
  data = df2,
  index = c("countrynum", "year"),
  model = "within"
)
summary(model_plm)

## Fixed effects: country and year
model_plm2 <- plm(
  oneside_best_log ~ claimper + terrdum + strengthcent_ord + rebstrength_ord + nonmilsupport + l1gdppc_log + l1gdppc_change,
  data = df2,
  model = "within",
  effect = "twoways",
  index = c("countrynum", "year")
)
summary(model_plm2)

## felm with clustered standard errors by country
model_felm_cluster <- felm(
  oneside_best_log ~ claimper + terrdum + strengthcent_ord + rebstrength_ord + nonmilsupport + l1gdppc_log + l1gdppc_change|
    countrynum + year | 0 | countrynum,
  data = df2
)
summary(model_felm_cluster)

# fixest version with country-year fixed effects and clustering
model_feols <- feols(
  oneside_best_log ~ claimper + terrdum + strengthcent_ord + rebstrength_ord + nonmilsupport + l1gdppc_log + l1gdppc_change| countrynum + year,
  data = df2
)
summary(model_feols)


Rh2list <- list(
  "Model A" = model_plm,
  "Model B" = model_plm2,
  "Model C" = model_felm_cluster,
  "Model D" = model_feols
)

modelsummary(Rh2list,
             stars = c("*" = 0.1, "**" = 0.05, "***" = 0.01), 
             output = "results/supplement3.docx",
             coef_map = c(
               "claimper" = "% of Border Disputed States", 
               "terrdum" = "Domestic Territory",  
               "strengthcent_ord" = "Central Command Strength", 
               "rebstrength_ord" = "Rebel Strength", 
               "nonmilsupport" = "Nonmilitary Support",
               "l1gdppc_log" = "GDP per capita(ln)",
               "l1gdppc_change" = "GDP per capita growth(ln)"
             )
             ,
             gof_map = c("nobs" = "Observations", 
                         "r.squared" = "R²", 
                         "logLik" = "Log Likelihood"),
             notes = "Standard errors in parentheses.
             All models include country and year fixed effects unless otherwise noted. Clustered standard errors are computed at the country level where applicable.",
             fmt = 3
)

##Table S4
## Random effects model
model_random <- plm(
  oneside_best_log ~ claimper + terrdum + strengthcent_ord + rebstrength_ord + nonmilsupport + l1gdppc_log + l1gdppc_change,
  data = df2,
  index = c("countrynum", "year"),
  model = "random"
)

# Fixed effects model (already estimated as model_plm)
# Hausman test: FE vs RE
hausman_test <- phtest(model_plm, model_random)
#output
summary(model_random)
hausman_test

# list
Rh2_RElist <- list(
  "Fixed Effects (plm)" = model_plm,
  "Random Effects (plm)" = model_random
)

# output
modelsummary(
  Rh2_RElist,
  stars = c("*" = 0.1, "**" = 0.05, "***" = 0.01),
  output="results/supplement4.docx",
  coef_map = c(
    "claimper" = "% of Border Disputed States",
    "terrdum" = "Domestic Territory",
    "strengthcent_ord" = "Central Command Strength",
    "rebstrength_ord" = "Rebel strength",
    "nonmilsupport" = "Nonmilitary support",
    "l1gdppc_log" = "GDP per capita(ln)",
    "l1gdppc_change" = "GDP per capita growth(ln)"
  ),
  gof_map = c(
    "nobs" = "Observations",
    "r.squared" = "R²",
    "logLik" = "Log Likelihood"
  ),
  notes = paste(
    "Standard errors in parentheses.",
    "Fixed effects and random effects models estimated using plm.",
    "Hausman test favors fixed effects (p < 0.05), suggesting correlation between regressors and unit-specific effects."
  ),
  fmt = 3
)
